# -*- coding: utf-8 -*-
"""
"Option Return Predictability with Machine Learning and Big Data"

by 

Turan G. Bali, Heiner Beckmeyer, Mathis Moerke, and Florian Weigert

January 2023
"""
import pandas as pd
import numpy as np
from pylatex import LongTable, MultiColumn

from pylatex import (
    NoEscape,
    Command,
)

def set_mapper(src, level):
    if src == "Stock":
        return "S"
    else:
        if level == "Underlying":
            return "O"
        elif level == "Bucket":
            return "B"
        elif level == "Contract":
            return "I"
    

def cite(key):
    return Command("citeoa", arguments=[key])

flag_prepare = True
filename = "../../04_results/option_sample/analysis/features.xlsx"

# %%%
# Prepare raw Excel file

if flag_prepare:
    xls = pd.ExcelFile(filename, engine="openpyxl")
    features = pd.read_excel(xls, "feature_info")
    features = features[features.columns[:6]]
    features["Information Source"] = features["Information Source"].apply(lambda x: "Option" if(x =="Options") else "Stock")
    features["Information Set"] = features.apply(lambda x: set_mapper(x["Information Source"], x["Instrument Level"]), axis = 1)
    features = features[['Feature', 'Description', 'Source', 'Information Source',
           'Instrument Level', 'Information Set', 'Group']]
    features.to_parquet("../../03_data/features_overview.pq")
    
else:
    features = pd.read_parquet("../../03_data/features_overview.pq")

features.Source = features.Source.apply(lambda x: cite(x) if isinstance(x, str) else "")

# %%
# Create Longtable
label = "tab:char"
caption = "captionchar"
filename = "../../04_results/summary_stats/feature_group_classification.tex"

ncol = features.shape[1]
tabular = LongTable("ll" + "p{4cm}" + "l" * (ncol-3), booktabs=True)
cols = features.columns
# tabular.add_hline()
tabular.add_row(cols)
tabular.add_hline()
tabular.append(Command("endfirsthead"))

tabular.add_row(
    (MultiColumn(ncol, align="l", data=NoEscape("Table \\thetable \ from previous page")),)
)
tabular.add_hline()
tabular.add_row(cols)
tabular.add_hline()
tabular.end_table_header()

tabular.add_hline()
tabular.add_row((MultiColumn(ncol, align="r", data="Continued on Next Page"),))
tabular.add_hline()
tabular.end_table_footer()

# tabular.add_hline()
tabular.add_row((MultiColumn(ncol, align="r", data="Not Continued on Next Page"),))
tabular.add_hline()
tabular.end_table_last_footer()

for _, row in features.iterrows():
    tabular.add_row(row)
    
tabular.generate_tex(filename.replace(".tex", ""))

f = open(filename)
lines = f.readlines()
f.close()

# adjust references with "{-}"
new_lines = []
for line in lines:
    new_lines.append(line.replace("{-}","-"))
lines = new_lines

# insert caption and label
idx_tabular = np.argwhere(["\\toprule" in line for line in lines]).T[0][0]
caption_label = "\\caption{\\" + caption + "} \\label{" + label + "} \\\\%\n"
lines.insert(idx_tabular, caption_label)

with open(filename, mode="w") as f:
    f.write("% generated by python\n")
f.close()

with open(filename, mode="a") as f:
    for line in lines:
        f.write(line)
f.close()
print(filename)
